import requests
import re
import codecs




url = 'http://maoyan.com/board/4'

for i in range(0,10):
    data = {'offset':i*10}

    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
        }
    res = requests.get(url,params=data,headers=headers)

    html = res.content.decode('utf-8')

    #名次正则
    indexpat = """ <i class="board-index .*>(.*?)</i>"""
    #电影名正则
    titlepat = """<p class="name"><a .*>(.*?)</a></p>"""
    #主演正则
    starpat = """<p class="star">\n\s*(.*?)\n\s*</p>"""
    #上映时间正则
    relpat = """<p class="releasetime">(.*?)</p>\s*</div>"""
    #评分正则
    scorepat = """<i class="integer">(.*?)</i><i class="fraction">(.*?)</i>"""

    picpat = '<img data-src="(.*?)" alt=".*" class="board-img" />'

    indexlist = re.findall(indexpat,html)
    
    titlelist = re.findall(titlepat,html)

    starlist = re.findall(starpat,html)

    rellist = re.findall(relpat,html)

    scorelist = re.findall(scorepat,html)

    piclist = re.findall(picpat,html)
    '''
    print(scorelist)
    for i in scorelist:
        print(i[0]+""+i[1])
        
    '''

    for i in range(0,10):
        print("{:10}|{:10}|{:10}|{:10}|{:1}{:1}".format(indexlist[i],titlelist[i],starlist[i],rellist[i],scorelist[i][0],scorelist[i][1]))
    f = codecs.open('output.html', 'a', 'utf-8')
    for i in range(0,10):
    	f.write("| {:<20} | {} | {} | {} | {}{} | <a href=\"{}\">{}</a><br/>".format(indexlist[i],titlelist[i],starlist[i],rellist[i],scorelist[i][0],scorelist[i][1],piclist[i],piclist[i]))
    f.close()
